* Reset settings and initialize log file
launch, path("build/cps_asec_builder")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Prepare CPS ASEC data.
*-------------------------------------------------------------------------------


* Construct ASEC dataset
*-------------------------------------------------------------------------------

* Load data on residents
gzuse "$basepath/data/raw/cps/asec/$cps_asec.dta.gz", clear

* Verify that everyone has a valid ASEC identifer
assert !missing(marbasecidp) & marbasecidp != 0

* Verify the analysis period
quietly sum year
assert r(min) == 1989 & r(max) == 2019

* Restrict to prime-age respondents
keep if inrange(age, 25, 49)

* Drop the ASEC oversample (which is not linkable to the basic monthly CPS)
keep if asecoverp == 0

* Drop individuals not in the civilian employment universe
drop if inlist(empstat, 0, 1)


* Process technical variables
*-------------------------------------------------------------------------------

* Process household/person identifiers
rename cpsid hid_asec
rename cpsidp pid_asec

* Extract calendar year/month
gen int tm = ym(year, month)
format tm %tm

* Compress weights to save space
recast float asecwt, force

* Specify ID variables
local idvars "pid_asec hid_asec tm marbasecidp asecwt"


* Process labor market variables
*-------------------------------------------------------------------------------

* Process usual hours worked last year
replace uhrsworkly = 0 if uhrsworkly == 999
assert inrange(uhrsworkly, 0, 99)

* Process weeks worked last year
assert inrange(wkswork1, 0, 52)

* Map 3-digit occupation codes into 2-digit codes
rename occ90ly occ1990
quietly do "$basepath/code/build/create_occ1990_2d.do"
rename occ1990 occ90ly
rename occ1990_2d occ90ly_2d

* Employment variables
local empvars "uhrsworkly wkswork1 occ90ly occ90ly_2d ind90ly"


* Process earnings variables
* (https://cps.ipums.org/cps/topcodes_tables.shtml)
*-------------------------------------------------------------------------------

* Show max value for each survey year
tabstat incwage, by(year) stats(min max)

* Adjust pre-1996 topcoded incomes (Census already adjusts from 1996 onward)
assert inrange(incwage, 0, 199998) if inrange(year, 1989, 1995) & incwage != .
replace incwage = 1.5 * incwage if inrange(year, 1989, 1995) & incwage == 199998

* Convert last year's earnings to Dec. 2019 dollars using PCE deflator
replace tm = tm - 12
merge m:1 tm using "$basepath/data/derived/pce.dta", assert(2 3) keep(3) nogenerate
replace tm = tm + 12
replace incwage = incwage/pce

* Earnings variable
local earnvars = "incwage"


* Consolidate variables
*-------------------------------------------------------------------------------

 * Restrict to desired variables
keep `idvars' `empvars' `earnvars'
order `idvars' `empvars' `earnvars'

* Set panel/time IDs
tsset marbasecidp tm

* Stash the ASEC data
tempfile asec
save `asec'


* Merge with main CPS sample
*-------------------------------------------------------------------------------

* Stash March observations from the basic monthly CPS
gzuse if month == 3 using "$basepath/data/derived/cps_bms_sample.dta.gz", clear
assert !missing(marbasecidp)

* Merge the ASEC with the basic monthly CPS
merge 1:1 marbasecidp tm using `asec', keep(3) nogenerate

* Verify identifiers
assert pid_asec == pid if !missing(pid_asec)
assert hid_asec == hid if !missing(hid_asec)
drop pid_asec hid_asec


* Finalize extract
*-------------------------------------------------------------------------------

* Label variables
label variable asecwt      "ASEC person weight"
label variable uhrsworkly  "Usual hours worked per week last year"
label variable wkswork1    "Weeks worked last year"
label variable occ90ly     "Occupation last year, 1990 coding"
label variable occ90ly_2d  "Occupation last year, 1990 2-digit coding"
label variable ind90ly     "Industry last year, 1990 coding"
label variable incwage     "Wage and salary income last year"

* Save the cleaned extract
gisid pid tm
sort pid tm
compress
gzsave "$basepath/data/derived/cps_asec.dta.gz", replace

* Close the log file
unlaunch
